###change
library(leaflet)
library(readxl)
library(dplyr)
library(reticulate)
maindata <- read_excel("maindata.xlsx")
tmp=maindata
Let look at how many unique values each column has
sapply(maindata, function(x) length(unique(x)))
id log_price property_type room_type
74111 767 35 3
amenities accommodates bathrooms bed_type
67122 16 18 5
cancellation_policy cleaning_fee city Description
5 2 6 73469
first_review host_has_profile_pic host_identity_verified host_response_rate
2555 3 3 81
host_since instant_bookable last_review lat
3088 2 1372 74058
long name neighbourhood number_of_reviews
73973 73331 620 371
review_scores_rating thumbnail_url zipcode bedrooms
55 65884 669 12
beds
19
maindata %>% group_by(cancellation_policy) %>% select(cancellation_policy)%>% unique()
count(maindata, cancellation_policy) %>% mutate(relative_freq=(n/sum(n)))
maindata %>% group_by(bed_type) %>% select(bed_type)%>% unique()
library(dplyr)
maindata %>% select(property_type) %>% count(property_type) %>% mutate(relative_freq=(n/sum(n)))
maindata %>% filter(city=="NYC") %>% select(bed_type) %>% count(bed_type) %>% mutate(relative_freq=(n/sum(n)))
library(ggplot2)
maindata%>% ggplot( aes(x=cancellation_policy, y=log_price))+ geom_boxplot()+ ggtitle( "Boxplot for log_price vs cancellation policy")
ggplot(maindata, aes(x=bed_type, y=log_price))+ geom_boxplot()+ ggtitle( "Boxplot for log_price vs bed type")
ggplot(maindata, aes(x=cleaning_fee, y=log_price))+ geom_boxplot()+ ggtitle( "Boxplot for log_price vs cleaning fee")
Do some hypothesis testing.
vcd::mosaic(~cleaning_fee+instant_bookable, data=maindata, shade=TRUE)
First, let us plot the different properties, using the latitude and longitude information given in our dataset.
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat, color = "red",
stroke=FALSE)
m
From the map above, we can see that the properties listed in our datset are from six different locations: Los Angeles, New York, DC, Boston, Chicago, San Fransico etc.
rows = (maindata$city== "NYC")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
Talk about which part of NYC has higher log_price and which part has lowest.
rows = (maindata$city== "Boston")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
rows = (maindata$city== "DC")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
rows = (maindata$city== "SF")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
rows = (maindata$city== "LA")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
We can see that along the edge there are properties that are high priced.
rows = (maindata$city== "Chicago")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
rows = (maindata$city== "Chicago")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$property_type,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~longitute, ~latitude,radius = ~log_price,
fillColor = ~mypalette(property_type),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~property_type, opacity=0.9,
title = "Property type", position = "bottomright" )
Error in eval(f[[2]], metaData(data), environment(f)) :
object 'longitute' not found